import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import squarify
import missingno as mn
from wordcloud import WordCloud, STOPWORDS
data_frame = pd.read_csv("US_Accidents_Dec20.csv")
data_frame
| ID | Severity | Start_Time | End_Time | Start_Lat | Start_Lng | End_Lat | End_Lng | Distance(mi) | Description | ... | Roundabout | Station | Stop | Traffic_Calming | Traffic_Signal | Turning_Loop | Sunrise_Sunset | Civil_Twilight | Nautical_Twilight | Astronomical_Twilight | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | A-2716600 | 3 | 2016-02-08 00:37:08 | 2016-02-08 06:37:08 | 40.10891 | -83.09286 | 40.11206 | -83.03187 | 3.230 | Between Sawmill Rd/Exit 20 and OH-315/Olentang... | ... | False | False | False | False | False | False | Night | Night | Night | Night |
| 1 | A-2716601 | 2 | 2016-02-08 05:56:20 | 2016-02-08 11:56:20 | 39.86542 | -84.06280 | 39.86501 | -84.04873 | 0.747 | At OH-4/OH-235/Exit 41 - Accident. | ... | False | False | False | False | False | False | Night | Night | Night | Night |
| 2 | A-2716602 | 2 | 2016-02-08 06:15:39 | 2016-02-08 12:15:39 | 39.10266 | -84.52468 | 39.10209 | -84.52396 | 0.055 | At I-71/US-50/Exit 1 - Accident. | ... | False | False | False | False | False | False | Night | Night | Night | Day |
| 3 | A-2716603 | 2 | 2016-02-08 06:15:39 | 2016-02-08 12:15:39 | 39.10148 | -84.52341 | 39.09841 | -84.52241 | 0.219 | At I-71/US-50/Exit 1 - Accident. | ... | False | False | False | False | False | False | Night | Night | Night | Day |
| 4 | A-2716604 | 2 | 2016-02-08 06:51:45 | 2016-02-08 12:51:45 | 41.06213 | -81.53784 | 41.06217 | -81.53547 | 0.123 | At Dart Ave/Exit 21 - Accident. | ... | False | False | False | False | False | False | Night | Night | Day | Day |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1516059 | A-4239402 | 2 | 2019-08-23 18:03:25 | 2019-08-23 18:32:01 | 34.00248 | -117.37936 | 33.99888 | -117.37094 | 0.543 | At Market St - Accident. | ... | False | False | False | False | False | False | Day | Day | Day | Day |
| 1516060 | A-4239403 | 2 | 2019-08-23 19:11:30 | 2019-08-23 19:38:23 | 32.76696 | -117.14806 | 32.76555 | -117.15363 | 0.338 | At Camino Del Rio/Mission Center Rd - Accident. | ... | False | False | False | False | False | False | Day | Day | Day | Day |
| 1516061 | A-4239404 | 2 | 2019-08-23 19:00:21 | 2019-08-23 19:28:49 | 33.77545 | -117.84779 | 33.77740 | -117.85727 | 0.561 | At Glassell St/Grand Ave - Accident. in the ri... | ... | False | False | False | False | False | False | Day | Day | Day | Day |
| 1516062 | A-4239405 | 2 | 2019-08-23 19:00:21 | 2019-08-23 19:29:42 | 33.99246 | -118.40302 | 33.98311 | -118.39565 | 0.772 | At CA-90/Marina Fwy/Jefferson Blvd - Accident. | ... | False | False | False | False | False | False | Day | Day | Day | Day |
| 1516063 | A-4239406 | 2 | 2019-08-23 18:52:06 | 2019-08-23 19:21:31 | 34.13393 | -117.23092 | 34.13736 | -117.23934 | 0.537 | At Highland Ave/Arden Ave - Accident. | ... | False | False | False | False | False | False | Day | Day | Day | Day |
1516064 rows × 47 columns
data_frame.head()
| ID | Severity | Start_Time | End_Time | Start_Lat | Start_Lng | End_Lat | End_Lng | Distance(mi) | Description | ... | Roundabout | Station | Stop | Traffic_Calming | Traffic_Signal | Turning_Loop | Sunrise_Sunset | Civil_Twilight | Nautical_Twilight | Astronomical_Twilight | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | A-2716600 | 3 | 2016-02-08 00:37:08 | 2016-02-08 06:37:08 | 40.10891 | -83.09286 | 40.11206 | -83.03187 | 3.230 | Between Sawmill Rd/Exit 20 and OH-315/Olentang... | ... | False | False | False | False | False | False | Night | Night | Night | Night |
| 1 | A-2716601 | 2 | 2016-02-08 05:56:20 | 2016-02-08 11:56:20 | 39.86542 | -84.06280 | 39.86501 | -84.04873 | 0.747 | At OH-4/OH-235/Exit 41 - Accident. | ... | False | False | False | False | False | False | Night | Night | Night | Night |
| 2 | A-2716602 | 2 | 2016-02-08 06:15:39 | 2016-02-08 12:15:39 | 39.10266 | -84.52468 | 39.10209 | -84.52396 | 0.055 | At I-71/US-50/Exit 1 - Accident. | ... | False | False | False | False | False | False | Night | Night | Night | Day |
| 3 | A-2716603 | 2 | 2016-02-08 06:15:39 | 2016-02-08 12:15:39 | 39.10148 | -84.52341 | 39.09841 | -84.52241 | 0.219 | At I-71/US-50/Exit 1 - Accident. | ... | False | False | False | False | False | False | Night | Night | Night | Day |
| 4 | A-2716604 | 2 | 2016-02-08 06:51:45 | 2016-02-08 12:51:45 | 41.06213 | -81.53784 | 41.06217 | -81.53547 | 0.123 | At Dart Ave/Exit 21 - Accident. | ... | False | False | False | False | False | False | Night | Night | Day | Day |
5 rows × 47 columns
data_frame.tail()
| ID | Severity | Start_Time | End_Time | Start_Lat | Start_Lng | End_Lat | End_Lng | Distance(mi) | Description | ... | Roundabout | Station | Stop | Traffic_Calming | Traffic_Signal | Turning_Loop | Sunrise_Sunset | Civil_Twilight | Nautical_Twilight | Astronomical_Twilight | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1516059 | A-4239402 | 2 | 2019-08-23 18:03:25 | 2019-08-23 18:32:01 | 34.00248 | -117.37936 | 33.99888 | -117.37094 | 0.543 | At Market St - Accident. | ... | False | False | False | False | False | False | Day | Day | Day | Day |
| 1516060 | A-4239403 | 2 | 2019-08-23 19:11:30 | 2019-08-23 19:38:23 | 32.76696 | -117.14806 | 32.76555 | -117.15363 | 0.338 | At Camino Del Rio/Mission Center Rd - Accident. | ... | False | False | False | False | False | False | Day | Day | Day | Day |
| 1516061 | A-4239404 | 2 | 2019-08-23 19:00:21 | 2019-08-23 19:28:49 | 33.77545 | -117.84779 | 33.77740 | -117.85727 | 0.561 | At Glassell St/Grand Ave - Accident. in the ri... | ... | False | False | False | False | False | False | Day | Day | Day | Day |
| 1516062 | A-4239405 | 2 | 2019-08-23 19:00:21 | 2019-08-23 19:29:42 | 33.99246 | -118.40302 | 33.98311 | -118.39565 | 0.772 | At CA-90/Marina Fwy/Jefferson Blvd - Accident. | ... | False | False | False | False | False | False | Day | Day | Day | Day |
| 1516063 | A-4239406 | 2 | 2019-08-23 18:52:06 | 2019-08-23 19:21:31 | 34.13393 | -117.23092 | 34.13736 | -117.23934 | 0.537 | At Highland Ave/Arden Ave - Accident. | ... | False | False | False | False | False | False | Day | Day | Day | Day |
5 rows × 47 columns
data_frame.shape
(1516064, 47)
data_frame.index
RangeIndex(start=0, stop=1516064, step=1)
data_frame.describe()
| Severity | Start_Lat | Start_Lng | End_Lat | End_Lng | Distance(mi) | Number | Temperature(F) | Wind_Chill(F) | Humidity(%) | Pressure(in) | Visibility(mi) | Wind_Speed(mph) | Precipitation(in) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 1.516064e+06 | 1.516064e+06 | 1.516064e+06 | 1.516064e+06 | 1.516064e+06 | 1.516064e+06 | 4.699690e+05 | 1.473031e+06 | 1.066748e+06 | 1.470555e+06 | 1.479790e+06 | 1.471853e+06 | 1.387202e+06 | 1.005515e+06 |
| mean | 2.238630e+00 | 3.690056e+01 | -9.859919e+01 | 3.690061e+01 | -9.859901e+01 | 5.872617e-01 | 8.907533e+03 | 5.958460e+01 | 5.510976e+01 | 6.465960e+01 | 2.955495e+01 | 9.131755e+00 | 7.630812e+00 | 8.477855e-03 |
| std | 6.081481e-01 | 5.165653e+00 | 1.849602e+01 | 5.165629e+00 | 1.849590e+01 | 1.632659e+00 | 2.242190e+04 | 1.827316e+01 | 2.112735e+01 | 2.325986e+01 | 1.016756e+00 | 2.889112e+00 | 5.637364e+00 | 1.293168e-01 |
| min | 1.000000e+00 | 2.457022e+01 | -1.244976e+02 | 2.457011e+01 | -1.244978e+02 | 0.000000e+00 | 0.000000e+00 | -8.900000e+01 | -8.900000e+01 | 1.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 |
| 25% | 2.000000e+00 | 3.385422e+01 | -1.182076e+02 | 3.385420e+01 | -1.182077e+02 | 0.000000e+00 | 1.212000e+03 | 4.700000e+01 | 4.080000e+01 | 4.800000e+01 | 2.944000e+01 | 1.000000e+01 | 4.600000e+00 | 0.000000e+00 |
| 50% | 2.000000e+00 | 3.735113e+01 | -9.438100e+01 | 3.735134e+01 | -9.437987e+01 | 1.780000e-01 | 4.000000e+03 | 6.100000e+01 | 5.700000e+01 | 6.800000e+01 | 2.988000e+01 | 1.000000e+01 | 7.000000e+00 | 0.000000e+00 |
| 75% | 2.000000e+00 | 4.072593e+01 | -8.087469e+01 | 4.072593e+01 | -8.087449e+01 | 5.940000e-01 | 1.010000e+04 | 7.300000e+01 | 7.100000e+01 | 8.400000e+01 | 3.004000e+01 | 1.000000e+01 | 1.040000e+01 | 0.000000e+00 |
| max | 4.000000e+00 | 4.900058e+01 | -6.711317e+01 | 4.907500e+01 | -6.710924e+01 | 1.551860e+02 | 9.999997e+06 | 1.706000e+02 | 1.130000e+02 | 1.000000e+02 | 5.804000e+01 | 1.400000e+02 | 9.840000e+02 | 2.400000e+01 |
data_frame.columns
Index(['ID', 'Severity', 'Start_Time', 'End_Time', 'Start_Lat', 'Start_Lng',
'End_Lat', 'End_Lng', 'Distance(mi)', 'Description', 'Number', 'Street',
'Side', 'City', 'County', 'State', 'Zipcode', 'Country', 'Timezone',
'Airport_Code', 'Weather_Timestamp', 'Temperature(F)', 'Wind_Chill(F)',
'Humidity(%)', 'Pressure(in)', 'Visibility(mi)', 'Wind_Direction',
'Wind_Speed(mph)', 'Precipitation(in)', 'Weather_Condition', 'Amenity',
'Bump', 'Crossing', 'Give_Way', 'Junction', 'No_Exit', 'Railway',
'Roundabout', 'Station', 'Stop', 'Traffic_Calming', 'Traffic_Signal',
'Turning_Loop', 'Sunrise_Sunset', 'Civil_Twilight', 'Nautical_Twilight',
'Astronomical_Twilight'],
dtype='object')
print(data_frame.count(numeric_only=True))
print("Total No. of Numerical Columns:", len(data_frame.count(numeric_only=True)))
Severity 1516064 Start_Lat 1516064 Start_Lng 1516064 End_Lat 1516064 End_Lng 1516064 Distance(mi) 1516064 Number 469969 Temperature(F) 1473031 Wind_Chill(F) 1066748 Humidity(%) 1470555 Pressure(in) 1479790 Visibility(mi) 1471853 Wind_Speed(mph) 1387202 Precipitation(in) 1005515 Amenity 1516064 Bump 1516064 Crossing 1516064 Give_Way 1516064 Junction 1516064 No_Exit 1516064 Railway 1516064 Roundabout 1516064 Station 1516064 Stop 1516064 Traffic_Calming 1516064 Traffic_Signal 1516064 Turning_Loop 1516064 dtype: int64 Total No. of Numerical Columns: 27
import missingno as missnum
missing_val = data_frame.isna().sum().sort_values(ascending=False)
missing_percent = missing_val[missing_val!=0]/len(data_frame)*100
print(" Missing Values in %\n", missing_percent)
Missing Values in % Number 69.000715 Precipitation(in) 33.675953 Wind_Chill(F) 29.637007 Wind_Speed(mph) 8.499773 Humidity(%) 3.001786 Visibility(mi) 2.916170 Weather_Condition 2.902714 Temperature(F) 2.838469 Wind_Direction 2.760965 Pressure(in) 2.392643 Weather_Timestamp 1.996222 Airport_Code 0.280199 Timezone 0.151841 Zipcode 0.061673 Sunrise_Sunset 0.005475 Civil_Twilight 0.005475 Nautical_Twilight 0.005475 Astronomical_Twilight 0.005475 City 0.005475 dtype: float64
null_columns = [i for i in data_frame.columns if data_frame[i].isnull().any()]
print(null_columns)
['Number', 'City', 'Zipcode', 'Timezone', 'Airport_Code', 'Weather_Timestamp', 'Temperature(F)', 'Wind_Chill(F)', 'Humidity(%)', 'Pressure(in)', 'Visibility(mi)', 'Wind_Direction', 'Wind_Speed(mph)', 'Precipitation(in)', 'Weather_Condition', 'Sunrise_Sunset', 'Civil_Twilight', 'Nautical_Twilight', 'Astronomical_Twilight']
data_frame.isnull().sum()
ID 0 Severity 0 Start_Time 0 End_Time 0 Start_Lat 0 Start_Lng 0 End_Lat 0 End_Lng 0 Distance(mi) 0 Description 0 Number 1046095 Street 0 Side 0 City 83 County 0 State 0 Zipcode 935 Country 0 Timezone 2302 Airport_Code 4248 Weather_Timestamp 30264 Temperature(F) 43033 Wind_Chill(F) 449316 Humidity(%) 45509 Pressure(in) 36274 Visibility(mi) 44211 Wind_Direction 41858 Wind_Speed(mph) 128862 Precipitation(in) 510549 Weather_Condition 44007 Amenity 0 Bump 0 Crossing 0 Give_Way 0 Junction 0 No_Exit 0 Railway 0 Roundabout 0 Station 0 Stop 0 Traffic_Calming 0 Traffic_Signal 0 Turning_Loop 0 Sunrise_Sunset 83 Civil_Twilight 83 Nautical_Twilight 83 Astronomical_Twilight 83 dtype: int64
missnum.matrix(data_frame[null_columns]);
data_frame.dropna(inplace=True)
data_frame.isnull().sum()
ID 0 Severity 0 Start_Time 0 End_Time 0 Start_Lat 0 Start_Lng 0 End_Lat 0 End_Lng 0 Distance(mi) 0 Description 0 Number 0 Street 0 Side 0 City 0 County 0 State 0 Zipcode 0 Country 0 Timezone 0 Airport_Code 0 Weather_Timestamp 0 Temperature(F) 0 Wind_Chill(F) 0 Humidity(%) 0 Pressure(in) 0 Visibility(mi) 0 Wind_Direction 0 Wind_Speed(mph) 0 Precipitation(in) 0 Weather_Condition 0 Amenity 0 Bump 0 Crossing 0 Give_Way 0 Junction 0 No_Exit 0 Railway 0 Roundabout 0 Station 0 Stop 0 Traffic_Calming 0 Traffic_Signal 0 Turning_Loop 0 Sunrise_Sunset 0 Civil_Twilight 0 Nautical_Twilight 0 Astronomical_Twilight 0 dtype: int64
data_frame.shape
(334821, 47)
missnum.matrix(data_frame[null_columns]);
data_frame.columns
Index(['ID', 'Severity', 'Start_Time', 'End_Time', 'Start_Lat', 'Start_Lng',
'End_Lat', 'End_Lng', 'Distance(mi)', 'Description', 'Number', 'Street',
'Side', 'City', 'County', 'State', 'Zipcode', 'Timezone',
'Airport_Code', 'Weather_Timestamp', 'Temperature(F)', 'Wind_Chill(F)',
'Humidity(%)', 'Pressure(in)', 'Visibility(mi)', 'Wind_Direction',
'Wind_Speed(mph)', 'Precipitation(in)', 'Weather_Condition', 'Amenity',
'Bump', 'Crossing', 'Give_Way', 'Junction', 'No_Exit', 'Railway',
'Roundabout', 'Station', 'Stop', 'Traffic_Calming', 'Traffic_Signal',
'Sunrise_Sunset', 'Civil_Twilight', 'Nautical_Twilight',
'Astronomical_Twilight', 'Day_of_Week'],
dtype='object')
from sklearn.model_selection import train_test_split
df1 = data_frame[[column for column in data_frame if data_frame[column].count() / len(data_frame) >= 0.3]]
data_frame.Severity.value_counts(normalize=True).sort_index().plot.bar()
plt.grid()
plt.title('Severity')
plt.xlabel('Severity')
plt.ylabel('');
boolean_columns = [col for col in data_frame.columns if data_frame[col].dtype ==np.dtype('bool')]
booleandf = data_frame[boolean_columns]
meta = booleandf[booleandf.sum(axis=1) > 1]
print('There are {} metadata rows, which are {:.1f}% of the data'.format(len(meta),100*len(meta)/len(data_frame)))
There are 39052 metadata rows, which are 11.7% of the data
booleans = booleandf.sum(axis=0)
booleans.plot.pie(figsize=(13,13))
plt.ylabel('')
plt.title('Proximity to Traffic');
start = pd.to_datetime(df.Start_Time, format='%Y-%m-%d %H:%M:%S')
end = pd.to_datetime(df.End_Time, format='%Y-%m-%d %H:%M:%S')
d = (end-start)
top20 = d.astype('timedelta64[m]').value_counts().nlargest(20)
print('Top 20 accident durations in US Accidents dataset data {:.1f}% '.format(top20.sum()*100/len(d)))
(top20/top20.sum()).plot.bar(figsize=(14,14))
plt.title('Overall Duration of Accident in Minutes')
plt.xlabel('Duration in Minutes')
plt.ylabel('Fraction');
Top 20 accident durations in US Accidents dataset data 37.3%
count_statewise = data_frame['State'].value_counts()
top10_statecount = count_statewise.iloc[:10]
other_statecount = {'Others': count_statewise.iloc[10:].values.sum()}
overall_count = top10_statecount.append(pd.Series(other_statecount))
overall_count
CA 77394 FL 53115 OR 27582 PA 17228 SC 15570 NC 14972 TX 13706 MN 12003 NY 11469 VA 9993 Others 81789 dtype: int64
plt.figure(figsize=(15, 10))
sns.barplot(top10_statecount.index, top10_statecount.values)
plt.show()
P:\UNCC Subjects\Machine Learning\Machine Learning Lab\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
fig = go.Figure(data=go.Choropleth(
locations=count_statewise.index,
z = count_statewise.values,
locationmode = 'USA-states',
colorscale = 'Reds',
colorbar_title = "Accidents",
))
fig.update_layout(
title_text = 'US Accidents States wise',
geo_scope='usa',
width=900, height=700
)
fig.show()
california = data_frame[data_frame['State'] == 'CA'].groupby('County').size().sort_values(ascending=False)
california.head()
County Los Angeles 18642 Kern 4266 San Bernardino 3529 Riverside 2883 Fresno 2741 dtype: int64
plt.figure(figsize=(15, 10))
sns.barplot(california[:10].index, california[:10].values)
plt.show()
P:\UNCC Subjects\Machine Learning\Machine Learning Lab\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
plt.figure(figsize=(15, 10))
plt.pie(top10_statecount.values, labels=top10_statecount.index, autopct='%1.1f%%')
plt.show()
plt.figure(figsize=(15, 10))
data_frame.groupby('Weather_Condition').size().sort_values(ascending = False).iloc[:5].plot.pie(explode=[0.1,0,0,0,0],autopct='%1.1f%%',shadow=True)
plt.show()
data_frame['Start_Time'] = pd.to_datetime(data_frame['Start_Time'], infer_datetime_format=True)
data_frame['End_Time'] = pd.to_datetime(data_frame['Start_Time'], infer_datetime_format=True)
data_frame['Day_of_Week'] = data_frame['Start_Time'].dt.day_name()
weekday_data = data_frame.groupby('Day_of_Week').size().sort_values(ascending = False)
weekday_data.head()
Day_of_Week Wednesday 55073 Thursday 54158 Friday 53505 Tuesday 52321 Monday 49018 dtype: int64
plt.figure(figsize=(13, 7))
sns.barplot(weekday_data.index, weekday_data.values)
plt.show()
P:\UNCC Subjects\Machine Learning\Machine Learning Lab\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
1)Most of the US Accidents i.e 0.8 have the severity 2 and followed by severity 4.
2)Major percentage of the US Accidents are occured at traffic signals, Crossing, Station, Stop and Amenity. The least percentage of accidents are occured at Bump, Roundabout, Railway, No-Exit, Junction
3)Most percentage of accidents are occured in California followed by florida.
4)Accidents are occurred in clear weather conditions(52.9%) and followed by cloudy weather 18.7% which means that weather conditions effects very less.
5) Weekday Accidents are higher in number compared to weekends.
from sklearn.model_selection import train_test_split
y = data_frame['Severity']
x = data_frame.iloc[:,2:]
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
X_train
| Start_Time | End_Time | Start_Lat | Start_Lng | End_Lat | End_Lng | Distance(mi) | Description | Number | Street | ... | Roundabout | Station | Stop | Traffic_Calming | Traffic_Signal | Sunrise_Sunset | Civil_Twilight | Nautical_Twilight | Astronomical_Twilight | Day_of_Week | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 271878 | 2020-11-07 13:38:30 | 2020-11-07 13:38:30 | 39.936237 | -75.142093 | 39.939962 | -75.143373 | 0.266 | Slow traffic from Washington Ave to I-95 N due... | 789.0 | S Christopher Columbus Blvd | ... | False | False | False | False | False | Day | Day | Day | Day | Saturday |
| 650573 | 2020-11-24 19:36:00 | 2020-11-24 19:36:00 | 33.993995 | -117.941435 | 33.992981 | -117.941334 | 0.070 | Incident on HEATHER HILL RD near HOUSE 1639 Dr... | 1637.0 | Heather Hill Rd | ... | False | False | False | False | False | Night | Night | Night | Night | Tuesday |
| 942345 | 2020-04-28 10:56:23 | 2020-04-28 10:56:23 | 38.865590 | -76.951310 | 38.865590 | -76.951310 | 0.000 | At Southern Ave - Accident. | 3799.0 | Alabama Ave SE | ... | False | False | False | False | True | Day | Day | Day | Day | Tuesday |
| 291539 | 2020-12-19 03:00:00 | 2020-12-19 03:00:00 | 36.384405 | -119.296784 | 36.385534 | -119.296739 | 0.078 | NB SR63 JNO AVENUE 328. RP IN A GRY NISS MORAN... | 32736.0 | Road 124 | ... | False | False | False | False | False | Night | Night | Night | Night | Saturday |
| 262377 | 2020-12-29 20:59:44 | 2020-12-29 20:59:44 | 35.278426 | -80.858179 | 35.279245 | -80.858575 | 0.061 | Incident on DAWNSHIRE AVE near NORTHBROOK DR D... | 3201.0 | Dawnshire Ave | ... | False | False | False | False | False | Night | Night | Night | Night | Tuesday |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 518354 | 2020-09-21 20:32:28 | 2020-09-21 20:32:28 | 40.019432 | -76.341253 | 40.019052 | -76.342442 | 0.068 | Incident on MILLERSVILLE PIKE near GABLE PARK ... | 1901.0 | Millersville Pike | ... | False | False | False | False | False | Night | Night | Night | Day | Monday |
| 933910 | 2020-04-23 18:12:27 | 2020-04-23 18:12:27 | 41.709690 | -87.779560 | 41.709690 | -87.779560 | 0.000 | At IL-43/S Harlem Ave - Accident. | 10099.0 | Southwest Hwy | ... | False | False | False | False | True | Day | Day | Day | Day | Thursday |
| 546628 | 2020-12-09 00:16:30 | 2020-12-09 00:16:30 | 38.978514 | -77.249367 | 38.985086 | -77.262616 | 0.844 | Stationary traffic on VA-193 from Spring Hill ... | 8915.0 | Georgetown Pike | ... | False | False | False | False | False | Night | Night | Night | Night | Wednesday |
| 582086 | 2020-11-16 19:28:00 | 2020-11-16 19:28:00 | 29.653686 | -82.496104 | 29.653890 | -82.496079 | 0.014 | Incident on SW 143RD ST near W NEWBERRY RD Dri... | 32.0 | SW 143rd St | ... | False | False | False | False | True | Night | Night | Night | Night | Monday |
| 523260 | 2020-10-27 22:40:10 | 2020-10-27 22:40:10 | 38.811299 | -77.211792 | 38.811322 | -77.210697 | 0.059 | Incident on BRADDOCK RD near BRADDOCK RD Drive... | 7817.0 | Braddock Rd | ... | False | False | False | False | False | Night | Night | Night | Night | Tuesday |
267856 rows × 44 columns
X_test
| Start_Time | End_Time | Start_Lat | Start_Lng | End_Lat | End_Lng | Distance(mi) | Description | Number | Street | ... | Roundabout | Station | Stop | Traffic_Calming | Traffic_Signal | Sunrise_Sunset | Civil_Twilight | Nautical_Twilight | Astronomical_Twilight | Day_of_Week | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 800489 | 2020-06-26 15:27:42 | 2020-06-26 15:27:42 | 40.121560 | -76.059790 | 40.145220 | -76.116550 | 3.415 | Between PA-23/Main St and N Railroad Ave - Acc... | 972.0 | Weaverland Rd | ... | False | False | False | False | False | Day | Day | Day | Day | Friday |
| 1148669 | 2019-09-05 20:43:00 | 2019-09-05 20:43:00 | 37.698356 | -122.118806 | 37.698356 | -122.118806 | 0.000 | At 167th Ave/Elgin St - Accident. | 16001.0 | Ashland Ave | ... | False | False | False | False | True | Night | Night | Night | Day | Thursday |
| 783093 | 2020-05-21 17:26:01 | 2020-05-21 17:26:01 | 39.887260 | -83.057380 | 39.887260 | -83.057380 | 0.000 | At Gantz Rd - Accident. | 3727.0 | Gantz Rd | ... | False | False | False | False | False | Day | Day | Day | Day | Thursday |
| 1035713 | 2019-12-31 21:19:18 | 2019-12-31 21:19:18 | 42.427190 | -73.660870 | 42.438270 | -73.651210 | 0.910 | Closed between Garrigan Rd and Parker Hall Rd ... | 297.0 | Garrigan Rd | ... | False | False | False | False | False | Night | Night | Night | Night | Tuesday |
| 821532 | 2020-06-11 14:38:51 | 2020-06-11 14:38:51 | 35.402380 | -97.530170 | 35.402380 | -97.530170 | 0.000 | At I-240/Southwest Expy/SW 74th St - Accident. | 6397.0 | S Western Ave | ... | False | False | True | False | False | Day | Day | Day | Day | Thursday |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1002997 | 2020-02-14 14:02:00 | 2020-02-14 14:02:00 | 44.443170 | -95.917700 | 44.443170 | -95.917700 | 0.000 | At CR-3/150th Ave - Vehicle spun around. | 2500.0 | 190th Ave | ... | False | False | False | False | False | Day | Day | Day | Day | Friday |
| 253762 | 2020-11-20 00:30:00 | 2020-11-20 00:30:00 | 28.540728 | -81.277423 | 28.541453 | -81.277405 | 0.050 | Incident on S CHICKASAW TRL near NEIGHBORHOOD ... | 303.0 | S Chickasaw Trl | ... | False | False | False | False | False | Night | Night | Night | Night | Friday |
| 1016948 | 2020-02-26 19:25:00 | 2020-02-26 19:25:00 | 37.691154 | -122.093752 | 37.691154 | -122.093752 | 0.000 | At John Dr - Accident. | 19998.0 | Foothill Blvd | ... | False | True | True | False | False | Night | Night | Night | Night | Wednesday |
| 756570 | 2020-05-08 16:17:18 | 2020-05-08 16:17:18 | 43.210370 | -77.600910 | 43.210370 | -77.600910 | 0.000 | At CR-91/Titus Ave - Accident. | 2280.0 | Hudson Ave | ... | False | False | False | False | True | Day | Day | Day | Day | Friday |
| 284396 | 2020-09-26 14:15:30 | 2020-09-26 14:15:30 | 25.763390 | -80.336261 | 25.760970 | -80.336170 | 0.167 | Stationary traffic on FL-973 from W Flagler St... | 782.0 | SW 87th Ave | ... | False | False | False | False | False | Day | Day | Day | Day | Saturday |
66965 rows × 44 columns
y_train
271878 2
650573 2
942345 2
291539 2
262377 2
..
518354 2
933910 3
546628 2
582086 2
523260 2
Name: Severity, Length: 267856, dtype: int64
y_test
800489 2
1148669 2
783093 2
1035713 4
821532 2
..
1002997 2
253762 2
1016948 2
756570 2
284396 2
Name: Severity, Length: 66965, dtype: int64